//Copyright 2010 Neurosciences Research Foundation, Incorporated
#include <stdlib.h>
#include <stdint.h>
#include <mpi.h>
#include <iostream>
#include <limits.h>
#include <vector>
#include <math.h>
#include <fstream>

#ifndef STRINGIZE
#define STRINGIZE(x) (#x)
#endif // !STRINGIZE

#ifndef MAKE_STRING
#define MAKE_STRING(x) STRINGIZE(x)
#endif // !MAKE_STRING

#ifndef COMM_H
#define COMM_H

//#define UPSAMPLE 2
const int STIM_ORIG=0;
const int STIM_PROBE=1;

#include "generator.h"
UnifRandGen my_generator = UnifRandGen();

enum dummydata_indexes {
	START = 0,
	STOP = 1
};

struct file_input {
	int external_group;
	int start_second;
	int stop_second;
	int first_file;
	int last_file;
	int files_read;
	bool loop_flag;
	int zero_pad_loop;
	int zero_pad_pattern;
	bool wm_phase_flag;
	int wm_state;
	int probe_counter;
	int stim_counter;
	int zp_counter;
	
};

struct comm {
	
	struct colors {
		enum constants {
			send,
			receive,
			master,
			slave
		};
	};
	
	static int world_rank;
	
	static int network_rank;
	static MPI_Comm network_comm;
	
	static int exchange_rank;
	static MPI_Comm exchange_comm;
	
	// FOR DISPLAY:
	static int display_rank;
	static MPI_Comm display_comm;
	
	static int color;
	
	static void init(int color);
};


#endif // !COMM_H



int comm::world_rank;

int comm::network_rank;
MPI_Comm comm::network_comm;

int comm::exchange_rank;
MPI_Comm comm::exchange_comm;

int comm::color;

// FOR DISPLAY:
int comm::display_rank;
MPI_Comm comm::display_comm;

void comm::init(int color)
{
	// world rank
	MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
	
	// split
	MPI_Comm_split(MPI_COMM_WORLD, color, world_rank, &network_comm);
	MPI_Comm_rank(network_comm, &network_rank);
	
	// split roots
#define MASTER 2
#define SLAVE 3
	
	MPI_Comm_split(MPI_COMM_WORLD,
				   network_rank == 0 ? MASTER : SLAVE,
				   color == colors::send ? 0 : 1,
				   &exchange_comm);
	
	MPI_Comm_rank(exchange_comm, &exchange_rank);

#ifdef DISPLAY_WINDOW
	MPI_Comm_split(MPI_COMM_WORLD, 101, world_rank+1, &display_comm);
	MPI_Comm_rank(display_comm, &display_rank);
#endif
	
};


// It returns a random permutation of 0..n-1
int* rand_perm(int n) {
	int *toReturn = (int*)malloc(n*sizeof(int));
	int k;
	for (k = 0; k < n; k++)
		toReturn[k] = k;
    	for (k = n-1; k > 0; k--) {
		int j = rand() % (k+1);
		int temp = toReturn[j];
		toReturn[j] = toReturn[k];
		toReturn[k] = temp;
    	}
    	return toReturn;
}



void read_input_spikes_from_file (int t_sec, int *spike_counts, int **spikes, int **bounds_matrix, int max_spikes_msec, std::vector<file_input*> file_stims)
{
	int temp_time, temp_index, current_file, stim, i, g;
	char spikes_filename[128];
	FILE *fspikes_in;
	static bool firsttime=true;
	static int wm_a = 0;
	static int wm_b = 0;
	static int wm_c = 0;
	static int wm_match = 0;
	static int wm_count = 0;

	static int *perm_vector;

	std::fstream patts;

	if (firsttime) {
		firsttime=false;
		patts.open ("ds_patthist.dat", std::fstream::out | std::fstream::trunc);		
	} else {
		patts.open ("ds_patthist.dat", std::fstream::out | std::fstream::app);
	}

	for (i = 0; i < 1000; i++)
		spike_counts[i] = 0;
		
	for ( stim = 0; stim < file_stims.size(); stim++ )
	{
		if ( (t_sec >= file_stims[stim]->start_second) && (t_sec < file_stims[stim]->stop_second) )
		{
			bool probe_stim_now = false;
			
			// increment to the next file to read
			current_file = file_stims[stim]->first_file + file_stims[stim]->files_read;
			file_stims[stim]->files_read++;
/*
			if ( (file_stims[stim]->wm_phase_flag) )
			{
				wm_count++;
				if (wm_count == 1) {
					current_file = file_stims[stim]->first_file + wm_a;
				} else if (wm_count == 2) {
					current_file = file_stims[stim]->first_file + wm_b;
				} else if (wm_count == 3) {
					current_file = file_stims[stim]->first_file + wm_c;
				} else if (wm_count == 10) {
					current_file = file_stims[stim]->first_file + wm_match;
					wm_match++;
					wm_count = 0;
				} else if (wm_count > 10) {
					wm_count = 0;
					continue;				
				} else {
					continue;				
				}
				
				if (wm_match > 3) {
					wm_match = 0;
					wm_c++;
					if (wm_c > 3) {
						wm_c = 0;
						wm_b++;
						if (wm_b > 3) {
							wm_b = 0;
							wm_a++;
							if (wm_a > 3) {
								wm_a = 0;
							}
						}
					}
				}
			}
*/			
			if ( (current_file > file_stims[stim]->last_file) )
			{
				if ( ((current_file <= ( file_stims[stim]->last_file + file_stims[stim]->zero_pad_loop )) && (file_stims[stim]->loop_flag)) || (!file_stims[stim]->loop_flag) )
				{
					continue;
				}
				else
				{
					file_stims[stim]->files_read = 0;
					current_file = file_stims[stim]->first_file;
					file_stims[stim]->files_read++;			
					file_stims[stim]->zp_counter = file_stims[stim]->zero_pad_pattern;
				}
			}

			if ( (file_stims[stim]->wm_phase_flag) )
			{
				if (file_stims[stim]->files_read == 1)
				{
					perm_vector = rand_perm(file_stims[stim]->last_file - file_stims[stim]->first_file + 1);
				}
				current_file = file_stims[stim]->first_file + perm_vector[file_stims[stim]->files_read - 1];
			}
			
			sprintf (spikes_filename, "%s/dummydata/spikes%d000.dat", MAKE_STRING(SIMDIR), current_file);
			fspikes_in = fopen (spikes_filename,"r");
			patts << t_sec*1000 << " " << current_file <<  std::endl;
			
			if (fspikes_in == NULL)
				break;
			
			std::cerr << std::endl << std::endl << "Reading In: " << spikes_filename << ", sec = " << t_sec << " --- DEBUG: " << wm_count << std::endl;
			i = 0;
			while ( fscanf(fspikes_in, "%d %d", &temp_time, &temp_index) != EOF )
			{
				
				// early escape from loop during WM STIMULUS B to check if we can get a shorter presentation
				if ( temp_time%1000 > 925 )
					break;
				
				g = file_stims[stim]->external_group;
				if ( (temp_index >= bounds_matrix[g][START]) && (temp_index <= bounds_matrix[g][STOP]) )
				{
					if ( spike_counts[ temp_time%1000 ] == max_spikes_msec )
					{
						std::cerr << "Number of external spikes for a single millisecond EXCEEDS number of external neurons!" << std::endl;
					}
					else
					{
#ifdef UPSAMPLE
						int x = (temp_index - bounds_matrix[g][START]) % 21;
						int y = (temp_index - bounds_matrix[g][START]) / 21;
						int x_bar = x * UPSAMPLE;
						int y_bar = y * UPSAMPLE;
						int new_index = x_bar + y_bar * 40;
						spikes[ temp_time%1000 ][ spike_counts[ temp_time%1000 ] ] = new_index;
#else
						spikes[ temp_time%1000 ][ spike_counts[ temp_time%1000 ] ] = temp_index;
#endif
						spike_counts[ temp_time%1000 ]++;
					}
				}
			}
			fclose (fspikes_in);
		}
	}
};


int** get_external_group_size (int *num_neurons, int *num_groups)
{
	int **bounds_matrix;
	int total_size, xTo, yTo, xFrom, yFrom, start_i_from, stop_i_from, counter, i;
	char area[8], type[8], external_filename[128];
	FILE *fin;
	
	sprintf (external_filename, "%s/groups_external.dat", MAKE_STRING(SIMDIR));
	fin = fopen (external_filename,"r");
	
	if (fin == NULL)
	{
		std::cerr << "No external groups file found!" << std::endl;
		*num_groups = 0;
		*num_neurons = 0;
		return NULL;
	}
	
	total_size = 0;
	counter = 0;
	while ( fscanf(fin, "%s", area) != EOF )
	{
		fscanf(fin, "%s", type);
		fscanf(fin, "%d %d %d %d", &xFrom, &yFrom, &start_i_from, &stop_i_from);
		
		total_size = total_size + (xFrom * yFrom);
		counter++;
	}
	rewind (fin);
	
	// MALLOC 2D MATRIX!!!
	bounds_matrix = (int **)malloc( sizeof(int *) * counter );
	if ( bounds_matrix == NULL )
	{
		fprintf(stderr,"Cannot allocate memory --- dummy_sym\n");
		exit(0);
	}
	for (i = 0; i < counter; i++) 
	{
		bounds_matrix[i] = (int *)malloc( sizeof(int) * 2 );
		if ( bounds_matrix[i] == NULL )
		{
			fprintf(stderr,"Cannot allocate memory --- dummy_sym\n");
			exit(0);
		}
	}
	
	counter = 0;
	while ( fscanf(fin, "%s", area) != EOF )
	{
		fscanf(fin, "%s", type);
		fscanf(fin, "%d %d %d %d", &xFrom, &yFrom, &start_i_from, &stop_i_from);
		bounds_matrix[counter][START] = start_i_from;
		bounds_matrix[counter][STOP] = stop_i_from;
		counter++;
	}
	*num_groups = counter;
	*num_neurons = total_size;
	
	fclose (fin);
	return bounds_matrix;
};


using namespace std;

int main(int argc, char* argv[])
{
	MPI_Init(&argc, &argv);
	
	comm::init(comm::colors::send);
	
	int numprocs;
	MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
	cout << numprocs << endl;
	
	int length = 0;
	int num_external_neurons;
	int num_external_groups;
	int *spike_count_buffer;
	int **spike_buffer;
	int **external_groups_bounds;
	
	// READ IN DUMMY GROUPS
	external_groups_bounds = get_external_group_size(&num_external_neurons, &num_external_groups);
	std::cerr << "*** Total External Groups " << num_external_groups << std::endl;
	for (int i = 0; i < num_external_groups; i++)
		std::cerr << "*** External Group " << i+1 << ", From: " << external_groups_bounds[i][START] << " - " << external_groups_bounds[i][STOP] << std::endl;
	
	spike_count_buffer = (int *)malloc( sizeof(int) * 1000 );
	spike_buffer = (int **)malloc( sizeof(int *) * 1000 );
	if ( (spike_count_buffer == NULL) | (spike_buffer == NULL) )
	{
		fprintf(stderr,"Cannot allocate memory --- dummy_sym\n");
		exit(0);
	}
	for (int i = 0; i < 1000; i++) {
		spike_buffer[i] = (int *)malloc( sizeof(int) * num_external_neurons );
		if ( spike_buffer[i] == NULL )
		{
			fprintf(stderr,"Cannot allocate memory --- dummy_sym\n");
			exit(0);
		}
	}

	// CREATE EXTERNAL SPIKE INPUTS
	std::vector <file_input*> external_inputs;
	external_inputs.resize(2); // MAKE SURE TO CHANGE THE SIZE WHEN ADDING FILE INPUTS
	{
		/*
			HOW TO USE:
			
			external_inputs[i] = (file_input *)malloc( sizeof(file_input) );
			external_inputs[i]->external_group = 0;
			external_inputs[i]->start_second = 0;
		 	external_inputs[i]->stop_second = 3600;
		 	external_inputs[i]->first_file = 1;
		  	external_inputs[i]->last_file = 59;
			external_inputs[i]->files_read = 0; <-- ***MUST BE ZERO***
			external_inputs[i]->zero_pad_loop = 1;
			external_inputs[i]->loop_flag = true;
		 
			This will read the spikes for the 1st group in 'groups_external.dat' and enter them into the
			simulation for the first 1 hour of the simulation (0 - 3600 seconds). It will start with
			file 'spikes1000.dat' and go to 'spikes59000.dat'. Since the loop variable is true, 
			it will then add 1 second of no input (zeor_pad) and then loop to the beginning and use 'spikes1000.dat' 
			again. This will continue until continue until simulation time reaches 3600 seconds. The files_read 
			variable should always be set to 0 initially.
		 
		 NOTE: now the zero pad comes in both loop (original) and per pattern flavors.  at the end of a loop that number is used, not the pattern-wise one
		*/
	
		int index=0;
		
		//pattern training:3 sec each, 5 patterns
		external_inputs[index] = (file_input *)malloc( sizeof(file_input) );
		external_inputs[index]->external_group = 0;
		external_inputs[index]->start_second = 0;
		external_inputs[index]->stop_second = 140;
		external_inputs[index]->first_file = 1;
		external_inputs[index]->last_file = 8;
		external_inputs[index]->files_read = 0;
		external_inputs[index]->loop_flag = true;
		external_inputs[index]->zero_pad_loop = 0;
		external_inputs[index]->zero_pad_pattern = 0;
		external_inputs[index]->wm_phase_flag = false;
		external_inputs[index]->wm_state = STIM_ORIG;
		external_inputs[index]->probe_counter = 0;
		external_inputs[index]->stim_counter = 0;
		external_inputs[index]->zp_counter = external_inputs[index]->zero_pad_pattern;
		
		index++;
/*		
		for (int iii=0; iii<20; iii++) {

			external_inputs[index] = (file_input *)malloc( sizeof(file_input) );
			external_inputs[index]->external_group = 0;
			external_inputs[index]->start_second = 140 + 2*iii + 1;
			external_inputs[index]->stop_second = 140 + 2*(iii+1);
			external_inputs[index]->first_file = iii%8 + 1;
			external_inputs[index]->last_file = external_inputs[index]->first_file;
			external_inputs[index]->files_read = 0;
			external_inputs[index]->loop_flag = true;
			external_inputs[index]->zero_pad_loop = 0;
			external_inputs[index]->zero_pad_pattern = 0;
			external_inputs[index]->wm_phase_flag = false;
			external_inputs[index]->wm_state = STIM_ORIG;
			external_inputs[index]->probe_counter = 0;
			external_inputs[index]->stim_counter = 0;
			external_inputs[index]->zp_counter = external_inputs[index]->zero_pad_pattern;

			index++;
		}
*/			
		//wm testing
		external_inputs[index] = (file_input *)malloc( sizeof(file_input) );
		external_inputs[index]->external_group = 0;
		external_inputs[index]->start_second = 140;
		external_inputs[index]->stop_second = 140 + 10*10;
		external_inputs[index]->first_file = 1;
		external_inputs[index]->last_file = 8;
		external_inputs[index]->files_read = 0;
		external_inputs[index]->loop_flag = true;
		external_inputs[index]->zero_pad_loop = 2;
		external_inputs[index]->zero_pad_pattern = 0;
		external_inputs[index]->wm_phase_flag = true;
		external_inputs[index]->wm_state = STIM_ORIG;
		external_inputs[index]->probe_counter = 0;
		external_inputs[index]->stim_counter = 0;
		external_inputs[index]->zp_counter = external_inputs[index]->zero_pad_pattern;		
		
		index++;
		   

	}

	
	if (argc >= 2)
		length = atoi(argv[1]);
	int* array = new int[length];	
#ifdef CHECKPOINT_RESTART
	for(int j = 0; j < INT_MAX; j++)
#else
	for(int j = 0; j < INT_MAX; j++)
#endif
	{
		if (comm::network_rank == 0)
		{
			if ( (num_external_neurons == 0) || (external_inputs.size() == 0) )
			{
				for (int32_t i = 0, v = j/20; i < length; ++i, v = v+10)
					array[i] = v;
				
				//cerr << "Bcast1" << endl;	
				MPI_Bcast(&length, 1, MPI_INT, 0, comm::exchange_comm);
				//cerr << "Bcast2" << endl;	
				MPI_Bcast(array, length, MPI_INT, 0, comm::exchange_comm);
				//DEBUG: cerr << "**** SENDER before 'barrier' ****" << array[9] <<endl;
				MPI_Barrier(comm::exchange_comm); // Without this, the external simulator may get ahead creating buffer overflow.
				//DEBUG: cerr << "**** SENDER after barrier ****" << array[9] <<endl;
			}
			else
			{
				if ( j%1000 == 0 )
				{
					//cerr << "about to read dummy spikes " << endl << flush;
					
					// READ DUMMYDATA FILE FOR THIS SECOND
					read_input_spikes_from_file ( floor( j/1000 ), spike_count_buffer, spike_buffer, external_groups_bounds, num_external_neurons, external_inputs);
				}
				MPI_Bcast( &spike_count_buffer[ j%1000 ], 1, MPI_INT, 0, comm::exchange_comm);
				MPI_Bcast( spike_buffer[ j%1000 ], spike_count_buffer[ j%1000 ], MPI_INT, 0, comm::exchange_comm);
				MPI_Barrier( comm::exchange_comm );
			}
		}
	}
	cerr << "******** sender is quitting" << endl;
	
	// FREE ALLOCATED MEMORY
	for (int i = 0; i < 1000; i++)
		free ( spike_buffer[i] );
	free ( spike_buffer );
	free ( spike_count_buffer );
	if ( external_groups_bounds != NULL )
	{
		for (int i = 0; i < num_external_groups; i++)
			free ( external_groups_bounds[i] );
		free ( external_groups_bounds );
	}
	
	// FINISH
	MPI_Finalize();
	return 0;
}
